Packages / R-options:

suppressPackageStartupMessages({
  library(plotly)
  library(gaston)
  library(dplyr)
})
# load engine's functions
sapply(FUN = source,
       X = list.files("../../src",
                      pattern = ".R$",
                      full.names = T))

#  R options
options(max.print = 20)

1 Get data

1.1 Inputs

genoFile <- '../../data/geno/breedGame_phasedGeno.vcf.gz'
crossTableFile <- '../../data/crossingTable/breedGame_small_crossTable.csv'
SNPcoordFile <- '../../data/SNPcoordinates/breedingGame_SNPcoord.csv'
markerEffectsFile <- '../../data/markerEffects/breedGame_markerEffects_2traits.json'

1.2 blup_estimations

blup_estimations <- calc_progenyBlupEstimation(
  genoFile = genoFile,
  crossTableFile = crossTableFile,
  SNPcoordFile = SNPcoordFile,
  markerEffectsFile = markerEffectsFile,
  # outFile = outFile
)

1.3 Crossing simulation

set.seed(1234)
markerEffects <- readMarkerEffects(markerEffectsFile)
crossSimOutFile <- crossingSimulation(
  genoFile = genoFile,
  crossTableFile = crossTableFile,
  SNPcoordFile = SNPcoordFile,
  nCross = 500)
simulated_genotypes <- gaston::read.vcf(crossSimOutFile)
## Warning in gaston::read.vcf(crossSimOutFile): NAs introduced by coercion
## Warning in gaston::read.vcf(crossSimOutFile): Some unknown chromosomes id's
## (try to set convert.chr = FALSE)
simulated_genotypes <- gaston::as.matrix(simulated_genotypes)
simulated_genotypes <- simulated_genotypes[, row.names(markerEffects$SNPeffects)]

simulation <- data.frame(
  simBV_1 = as.vector(simulated_genotypes %*% as.matrix(markerEffects$SNPeffects[,1])) + markerEffects$intercept[1],
  simBV_2 = as.vector(simulated_genotypes %*% as.matrix(markerEffects$SNPeffects[,2])) + markerEffects$intercept[2],
  Cross = as.factor(sub('-\\d+$', '', row.names(simulated_genotypes)))
)
colnames(simulation)[1] <- paste0("simBV_", names(markerEffects$intercept)[1])
colnames(simulation)[2] <- paste0("simBV_", names(markerEffects$intercept)[2])

2 1 Trait

trait <- "trait1"

simulation_t1 <- simulation[, c(paste0("simBV_", trait), "Cross")]
colnames(simulation_t1)[1] <- "simBV"


blup_estimations_t1 <- do.call(rbind, lapply(blup_estimations, function(blup_estim){
  data.frame(
    ind1 = blup_estim$ind1,
    ind2 = blup_estim$ind2,
    Cross = paste0(blup_estim$ind1, "_X_", blup_estim$ind2),
    blup_exp = blup_estim$blup_exp[[trait]],
    blup_var = blup_estim$blup_var[[trait]]
  )
}))

2.1 simulation VS blup estimations

p <- plotBlup_1trait(blup_estimations,
                     trait = "trait1",
                     y_axisName = "GV",
                     errorBarInterval = 0.95)

# add jittered markers of simulated BV
p <- p %>% add_boxplot(data = simulation_t1,
                       x = ~Cross,
                       y = ~simBV,
                       line = list(color = 'rgba(0,0,0,0)'),
                       # marker = list(color = 'rgba(31, 119, 180, 0.5)'),
                       fillcolor = 'rgba(0,0,0,0)',
                       name = "Simulated individuals' BV",
                       boxpoints = "all",
                       pointpos = 0,
                       jitter = 1,
                       hoverinfo = 'text',
                       text = apply(simulation_t1, 1, function(l) {
                         paste(names(l), ":", l, collapse = "\n")
                       }))

p
## Warning: 'box' objects don't have these attributes: 'mode', 'error_y'
## Valid attributes include:
## 'alignmentgroup', 'boxmean', 'boxpoints', 'customdata', 'customdatasrc', 'dx', 'dy', 'fillcolor', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'jitter', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'lowerfence', 'lowerfencesrc', 'marker', 'mean', 'meansrc', 'median', 'mediansrc', 'meta', 'metasrc', 'name', 'notched', 'notchspan', 'notchspansrc', 'notchwidth', 'offsetgroup', 'opacity', 'orientation', 'pointpos', 'q1', 'q1src', 'q3', 'q3src', 'quartilemethod', 'sd', 'sdsrc', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'upperfence', 'upperfencesrc', 'visible', 'whiskerwidth', 'width', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## 2023-11-06 16:38:06.627632 - r-plotBlup_1trait(): Check inputs ...
## 2023-11-06 16:38:06.627853 - r-plotBlup_1trait(): Check inputs DONE
## 2023-11-06 16:38:06.63057 - r-plotBlup_1trait(): sort x axis ...
## 2023-11-06 16:38:06.63064 - r-plotBlup_1trait(): sort x axis DONE
## 2023-11-06 16:38:06.630697 - r-plotBlup_1trait(): draw plot ...
## 2023-11-06 16:38:06.634845 - r-plotBlup_1trait(): draw plot DONE

2.1.1 Test observed mean

alpha <- 0.05
beta <- 0.95
pred_vs_sim <- full_join(simulation_t1, blup_estimations_t1, by = "Cross") %>% 
  filter(Cross != "Coll0659_X_Coll0425") %>% 
  group_by(Cross) %>%
  summarise(obs_mean =  mean(simBV),
            blup_exp = unique(blup_exp),
            obs_var = var(simBV),
            blup_var = unique(blup_var),
            p.value = t.test(x = simBV, mu = unique(blup_exp))$p.value,
            delta = power.t.test(n = 100, power = beta, sd = sqrt(unique(blup_var)), sig.level = alpha)$delta)
pred_vs_sim$adj.p.val <- p.adjust(pred_vs_sim$p.value, method = "bonferroni")
pred_vs_sim

idLine <- data.frame(mean = c(min(c(pred_vs_sim$blup_exp, pred_vs_sim$obs_mean)) - 1,
                                max(c(pred_vs_sim$blup_exp, pred_vs_sim$obs_mean)) + 1),
                     var = c(min(c(pred_vs_sim$blup_var, pred_vs_sim$obs_var)) - 1,
                                max(c(pred_vs_sim$blup_var, pred_vs_sim$obs_var)) + 1))

rmse <- sqrt(mean((pred_vs_sim$blup_exp - pred_vs_sim$obs_mean)^2))
  • RMSE = 0.141826
#plot mean
plot_ly(type = "scatter",
        mode = "markers",
        data = pred_vs_sim,
        x = ~blup_exp,
        y = ~obs_mean,
        name = "Observed mean vs prediction",
        hoverinfo = 'text',
        text = apply(pred_vs_sim, 1, function(l) {
          paste(names(l), ":", l, collapse = "\n")
        })
) %>%
  add_lines(inherit = FALSE,
            x = idLine$mean,
            y = idLine$mean,
            name = "Identity line")

2.1.2 Test normality

pred_vs_sim <- full_join(simulation_t1, blup_estimations_t1, by = "Cross") %>% 
  filter(Cross != "Coll0659_X_Coll0425") %>% 
  group_by(Cross) %>%
  summarise(obs_var = var(simBV),
            blup_var = unique(blup_var),
            p.value = ks.test(simBV, "pnorm", mean = unique(blup_exp), sd=sqrt(unique(blup_var)))$p.value)
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `p.value = ks.test(simBV, "pnorm", mean = unique(blup_exp), sd =
##   sqrt(unique(blup_var)))$p.value`.
## ℹ In group 1: `Cross = "Coll0659_X_F2_0001.0053"`.
## Caused by warning in `ks.test.default()`:
## ! ties should not be present for the Kolmogorov-Smirnov test
pred_vs_sim$adj.p.val <- p.adjust(pred_vs_sim$p.value, method = "bonferroni")
pred_vs_sim
#plot var
plot_ly(type = "scatter",
        mode = "markers",
        data = pred_vs_sim,
        x = ~blup_var,
        y = ~obs_var,
        name = "Observed variance vs prediction",
        hoverinfo = 'text',
        text = apply(pred_vs_sim, 1, function(l) {
          paste(names(l), ":", l, collapse = "\n")
        })
) %>%
  add_lines(inherit = FALSE,
            x = idLine$var,
            y = idLine$var,
            name = "Identity line")

3 2 Traits

3.1 simulation VS blup estimations

p <- plotBlup_2traits(blupDta = blup_estimations,
                      x_trait = 'trait1',
                      y_trait = 'trait2',
                      confidenceLevel = 0.95)

# add markers of simulated BV
p <- p %>% add_markers(inherit = FALSE,
                       data = simulation,
                       x = ~simBV_trait1,
                       y = ~simBV_trait2,
                       opacity = 0.5,
                       marker = list(symbol = 2),
                       color = sub("_X_", " X ", simulation$Cross))
p
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## 2023-11-06 16:38:07.119743 - r-plotBlup_2traits(): Check inputs ...
## 2023-11-06 16:38:07.119945 - r-plotBlup_2traits(): Check inputs DONE
## 2023-11-06 16:38:07.120005 - r-plotBlup_2traits(): draw plot ...
## 2023-11-06 16:38:07.131155 - r-plotBlup_2traits(): draw plot DONE
cross_to_keep <- c("F2_0001.0001_X_F2_0002.0059",
                   "F2_0001.0009_X_F4_0001.0147",
                   "F4_0001.0092_X_F4_0001.0185")

p <- plotBlup_2traits(blupDta = blup_estimations[names(blup_estimations) %in% cross_to_keep],
                      x_trait = 'trait1',
                      y_trait = 'trait2',
                      confidenceLevel = 0.95)


simulation_filtered <- simulation[simulation$Cross %in% cross_to_keep,]
# add jittered markers of simulated BV
p <- p %>% add_markers(inherit = FALSE,
                       data = simulation_filtered,
                       x = ~simBV_trait1,
                       y = ~simBV_trait2,
                       opacity = 0.5,
                       marker = list(symbol = 2),
                       color = sub("_X_", " X ", simulation_filtered$Cross))
p
## 2023-11-06 16:38:07.424941 - r-plotBlup_2traits(): Check inputs ...
## 2023-11-06 16:38:07.425088 - r-plotBlup_2traits(): Check inputs DONE
## 2023-11-06 16:38:07.425148 - r-plotBlup_2traits(): draw plot ...
## 2023-11-06 16:38:07.42823 - r-plotBlup_2traits(): draw plot DONE

Appendix

Session Information (click to expand)
## Document generated in:
## Time difference of 1.446892 mins
## 
## CPU: AMD Ryzen 5 3600X 6-Core Processor
## Memory total size: 32.78756 GB
## 
## 
## Session information:
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Arch Linux
## 
## Matrix products: default
## BLAS:   /usr/lib/libblas.so.3.11.0 
## LAPACK: /usr/lib/liblapack.so.3.11.0
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.1.2        gaston_1.5.9       RcppParallel_5.1.7 Rcpp_1.0.10       
## [5] plotly_4.10.2      ggplot2_3.4.2     
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.6          utf8_1.2.3          generics_0.1.3     
##  [4] tidyr_1.3.0         lattice_0.21-8      digest_0.6.32      
##  [7] magrittr_2.0.3      RColorBrewer_1.1-3  evaluate_0.21      
## [10] grid_4.3.2          fastmap_1.1.1       Matrix_1.5-4.1     
## [13] jsonlite_1.8.7      ape_5.7-1           mgcv_1.8-42        
## [16] httr_1.4.6          purrr_1.0.1         fansi_1.0.4        
## [19] crosstalk_1.2.0     viridisLite_0.4.2   scales_1.2.1       
## [22] permute_0.9-7       lazyeval_0.2.2      jquerylib_0.1.4    
## [25] vcfR_1.14.0         cli_3.6.1           rlang_1.1.1        
## [28] ellipsis_0.3.2      splines_4.3.2       munsell_0.5.0      
## [31] ellipse_0.5.0       withr_2.5.0         cachem_1.0.8       
## [34] yaml_2.3.7          vegan_2.6-4         tools_4.3.2        
## [37] parallel_4.3.2      memuse_4.2-3        pinfsc50_1.2.0     
## [40] colorspace_2.1-0    vctrs_0.6.3         R6_2.5.1           
## [43] lifecycle_1.0.3     htmlwidgets_1.6.2   MASS_7.3-60        
## [46] cluster_2.1.4       pkgconfig_2.0.3     pillar_1.9.0       
## [49] bslib_0.5.0         gtable_0.3.3        data.table_1.14.8  
## [52] glue_1.6.2          xfun_0.39           tibble_3.2.1       
## [55] tidyselect_1.2.0    rstudioapi_0.15.0   knitr_1.43         
## [58] farver_2.1.1        breedSimulatR_0.3.2 htmltools_0.5.5    
## [61] nlme_3.1-162        rmarkdown_2.22      compiler_4.3.2
shiny::HTML('&nbsp;
<!-- Add icon/font library -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.13.0/css/all.min.css">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato">

<div class="footer" style="font-family: Lato">
    <hr />
    <p style="text-align: center;"><a href="https://github.com/juliendiot42">Julien Diot</a></p>
    <p style="text-align: center;"><span style="color: #808080;"><em>juliendiot@ut-biomet.org</em></span></p>

<!-- Add font awesome icons -->
    <p style="text-align: center;">
        <a href="https://github.com/juliendiot42" class="fab fa-github"></a>
        <a href="https://www.linkedin.com/in/julien-diot-949592107/" class="fab fa-linkedin"></a>
        <a href="https://orcid.org/0000-0002-8738-2034" class="fab fa-orcid"></a>
        <a href="https://keybase.io/juliendiot" class="fab fa-keybase"></a>
    </p>
</div>
&nbsp;')